library(ggplot2)
source("process_data.R")
base_columns <- names(store)[!names(store) %in% c("Year", "Month")]
GGally::ggpairs(store, columns = base_columns)
plot: [1,1] [==-------------------------------------------------------] 4% est: 0s
plot: [1,2] [=====----------------------------------------------------] 8% est: 6s
plot: [1,3] [=======--------------------------------------------------] 12% est: 5s
plot: [1,4] [=========------------------------------------------------] 16% est: 4s
plot: [1,5] [===========----------------------------------------------] 20% est: 3s
plot: [2,1] [==============-------------------------------------------] 24% est: 3s
plot: [2,2] [================-----------------------------------------] 28% est: 3s
plot: [2,3] [==================---------------------------------------] 32% est: 2s
plot: [2,4] [=====================------------------------------------] 36% est: 2s
plot: [2,5] [=======================----------------------------------] 40% est: 2s
plot: [3,1] [=========================--------------------------------] 44% est: 2s
plot: [3,2] [===========================------------------------------] 48% est: 2s
plot: [3,3] [==============================---------------------------] 52% est: 1s
plot: [3,4] [================================-------------------------] 56% est: 1s
plot: [3,5] [==================================-----------------------] 60% est: 1s
plot: [4,1] [====================================---------------------] 64% est: 1s
plot: [4,2] [=======================================------------------] 68% est: 1s
plot: [4,3] [=========================================----------------] 72% est: 1s
plot: [4,4] [===========================================--------------] 76% est: 1s
plot: [4,5] [==============================================-----------] 80% est: 0s
plot: [5,1] [================================================---------] 84% est: 0s
plot: [5,2] [==================================================-------] 88% est: 0s
plot: [5,3] [====================================================-----] 92% est: 0s
plot: [5,4] [=======================================================--] 96% est: 0s
plot: [5,5] [=========================================================]100% est: 0s

GGally::ggpairs(store, columns = c(base_columns, "Year"),
mapping = ggplot2::aes(color = Year))
plot: [1,1] [==-------------------------------------------------------] 3% est: 0s
plot: [1,2] [===------------------------------------------------------] 6% est: 2s
plot: [1,3] [=====----------------------------------------------------] 8% est: 3s
plot: [1,4] [======---------------------------------------------------] 11% est: 3s
plot: [1,5] [========-------------------------------------------------] 14% est: 2s
plot: [1,6] [==========-----------------------------------------------] 17% est: 2s
plot: [2,1] [===========----------------------------------------------] 19% est: 4s
plot: [2,2] [=============--------------------------------------------] 22% est: 4s
plot: [2,3] [==============-------------------------------------------] 25% est: 3s
plot: [2,4] [================-----------------------------------------] 28% est: 3s
plot: [2,5] [=================----------------------------------------] 31% est: 3s
plot: [2,6] [===================--------------------------------------] 33% est: 3s
plot: [3,1] [=====================------------------------------------] 36% est: 3s
plot: [3,2] [======================-----------------------------------] 39% est: 3s
plot: [3,3] [========================---------------------------------] 42% est: 2s
plot: [3,4] [=========================--------------------------------] 44% est: 2s
plot: [3,5] [===========================------------------------------] 47% est: 2s
plot: [3,6] [============================-----------------------------] 50% est: 2s
plot: [4,1] [==============================---------------------------] 53% est: 2s
plot: [4,2] [================================-------------------------] 56% est: 2s
plot: [4,3] [=================================------------------------] 58% est: 2s
plot: [4,4] [===================================----------------------] 61% est: 2s
plot: [4,5] [====================================---------------------] 64% est: 1s
plot: [4,6] [======================================-------------------] 67% est: 1s
plot: [5,1] [========================================-----------------] 69% est: 1s
plot: [5,2] [=========================================----------------] 72% est: 1s
plot: [5,3] [===========================================--------------] 75% est: 1s
plot: [5,4] [============================================-------------] 78% est: 1s
plot: [5,5] [==============================================-----------] 81% est: 1s
plot: [5,6] [================================================---------] 83% est: 1s
plot: [6,1] [=================================================--------] 86% est: 1s
plot: [6,2] [===================================================------] 89% est: 0s
plot: [6,3] [====================================================-----] 92% est: 0s
plot: [6,4] [======================================================---] 94% est: 0s
plot: [6,5] [=======================================================--] 97% est: 0s
plot: [6,6] [=========================================================]100% est: 0s

GGally::ggpairs(category, columns = base_columns,
mapping = ggplot2::aes(color = Category))
plot: [1,1] [==-------------------------------------------------------] 4% est: 0s
plot: [1,2] [=====----------------------------------------------------] 8% est: 1s
plot: [1,3] [=======--------------------------------------------------] 12% est: 2s
plot: [1,4] [=========------------------------------------------------] 16% est: 2s
plot: [1,5] [===========----------------------------------------------] 20% est: 2s
plot: [2,1] [==============-------------------------------------------] 24% est: 2s
plot: [2,2] [================-----------------------------------------] 28% est: 2s
plot: [2,3] [==================---------------------------------------] 32% est: 2s
plot: [2,4] [=====================------------------------------------] 36% est: 2s
plot: [2,5] [=======================----------------------------------] 40% est: 1s
plot: [3,1] [=========================--------------------------------] 44% est: 1s
plot: [3,2] [===========================------------------------------] 48% est: 1s
plot: [3,3] [==============================---------------------------] 52% est: 1s
plot: [3,4] [================================-------------------------] 56% est: 1s
plot: [3,5] [==================================-----------------------] 60% est: 1s
plot: [4,1] [====================================---------------------] 64% est: 1s
plot: [4,2] [=======================================------------------] 68% est: 1s
plot: [4,3] [=========================================----------------] 72% est: 1s
plot: [4,4] [===========================================--------------] 76% est: 1s
plot: [4,5] [==============================================-----------] 80% est: 0s
plot: [5,1] [================================================---------] 84% est: 0s
plot: [5,2] [==================================================-------] 88% est: 0s
plot: [5,3] [====================================================-----] 92% est: 0s
plot: [5,4] [=======================================================--] 96% est: 0s
plot: [5,5] [=========================================================]100% est: 0s

p <- GGally::ggpairs(type, columns = base_columns,
mapping = ggplot2::aes(color = Type))
plotly::ggplotly(p)
lm_fit <- lm(Revenue ~ Category + Type + Qty_Sold, data = type)
summary(lm_fit)
Call:
lm(formula = Revenue ~ Category + Type + Qty_Sold, data = type)
Residuals:
Min 1Q Median 3Q Max
-9908.3 -829.8 -113.4 669.7 9003.4
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2415.49 1016.48 -2.376 0.019095 *
CategoryBikes 15021.33 1057.10 14.210 < 2e-16 ***
TypeHelmets -1374.45 944.01 -1.456 0.148056
TypeHybrid -2954.91 864.20 -3.419 0.000863 ***
TypeKids -9991.63 855.47 -11.680 < 2e-16 ***
TypeMountain -2076.47 852.78 -2.435 0.016391 *
TypeParts -9437.96 2844.83 -3.318 0.001208 **
TypeRoad NA NA NA NA
Qty_Sold 106.17 16.83 6.307 5.14e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2558 on 118 degrees of freedom
Multiple R-squared: 0.7644, Adjusted R-squared: 0.7505
F-statistic: 54.71 on 7 and 118 DF, p-value: < 2.2e-16
type %>%
dplyr::group_by(Category, Type) %>%
dplyr::summarize(min = min(Qty_Sold), median = median(Qty_Sold),
max = max(Qty_Sold))
boot_type <- type %>%
dplyr::select(Category, Type, Qty_Sold) %>%
sample_n(10000, replace = TRUE)
boot_type$Revenue <- predict(lm_fit, boot_type)
input_data_frame <- read.table(header = TRUE, stringsAsFactors = FALSE, text = '
Category Type input
Acessories Clothes 60
Acessories Helmets 70
Acessories Parts 200
Bikes Hybrid 20
Bikes Kids 11
Bikes Mountain 10
Bikes Road 11
')
get_revenue_prediction <- function(dat, input_data_frame) {
# dat <- boot_type
percentiles <- dat %>%
dplyr::group_by(Category, Type) %>%
dplyr::mutate(percentile = ntile(Qty_Sold, 10)) %>%
dplyr::group_by(Category, Type, percentile) %>%
dplyr::mutate(min = min(Qty_Sold), max = max(Qty_Sold)) %>%
dplyr::left_join(input_data_frame, c("Category", "Type")) %>%
dplyr::filter(max >= input & min < input)
percentiles
}
pred <- get_revenue_prediction(boot_type, input_data_frame) %>%
dplyr::select(Category, Type, Revenue) %>%
dplyr::mutate(Data = "Selected")
plot_data <- boot_type %>%
dplyr::select(Category, Type, Revenue) %>%
dplyr::mutate(Data = "All")
plot_data <- rbind(pred, plot_data)
p <- ggplot(plot_data, aes(Data, Revenue)) + geom_point(stat = "median")
Error: Found object is not a stat.
LS0tDQp0aXRsZTogIkJpa2UgUGVkZGxlciINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KYGBgDQoNCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0Kc291cmNlKCJwcm9jZXNzX2RhdGEuUiIpDQpiYXNlX2NvbHVtbnMgPC0gbmFtZXMoc3RvcmUpWyFuYW1lcyhzdG9yZSkgJWluJSBjKCJZZWFyIiwgIk1vbnRoIildDQpgYGANCg0KYGBge3J9DQpHR2FsbHk6OmdncGFpcnMoc3RvcmUsIGNvbHVtbnMgPSBiYXNlX2NvbHVtbnMpDQpgYGANCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpHR2FsbHk6OmdncGFpcnMoc3RvcmUsIGNvbHVtbnMgPSBjKGJhc2VfY29sdW1ucywgIlllYXIiKSwNCiAgICAgICAgICAgICAgICBtYXBwaW5nID0gZ2dwbG90Mjo6YWVzKGNvbG9yID0gWWVhcikpDQpgYGANCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpHR2FsbHk6OmdncGFpcnMoY2F0ZWdvcnksIGNvbHVtbnMgPSBiYXNlX2NvbHVtbnMsDQogICAgICAgICAgICAgICAgbWFwcGluZyA9IGdncGxvdDI6OmFlcyhjb2xvciA9IENhdGVnb3J5KSkNCmBgYA0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnAgPC0gR0dhbGx5OjpnZ3BhaXJzKHR5cGUsIGNvbHVtbnMgPSBiYXNlX2NvbHVtbnMsDQogICAgICAgICAgICAgICAgICAgICBtYXBwaW5nID0gZ2dwbG90Mjo6YWVzKGNvbG9yID0gVHlwZSkpIA0KcGxvdGx5OjpnZ3Bsb3RseShwKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCmBgYHtyfQ0KbG1fZml0IDwtIGxtKFJldmVudWUgfiBDYXRlZ29yeSArIFR5cGUgKyBRdHlfU29sZCwgZGF0YSA9IHR5cGUpDQpzdW1tYXJ5KGxtX2ZpdCkNCg0KYGBgDQoNCmBgYHtyfQ0KdHlwZSAlPiUNCiAgZHBseXI6Omdyb3VwX2J5KENhdGVnb3J5LCBUeXBlKSAlPiUNCiAgZHBseXI6OnN1bW1hcml6ZShtaW4gPSBtaW4oUXR5X1NvbGQpLCBtZWRpYW4gPSBtZWRpYW4oUXR5X1NvbGQpLCANCiAgICAgICAgICAgICAgICAgICBtYXggPSBtYXgoUXR5X1NvbGQpKQ0KYGBgDQoNCg0KYGBge3IsIHdhcm5pbmc9RkFMU0V9DQpib290X3R5cGUgPC0gdHlwZSAlPiUNCiAgZHBseXI6OnNlbGVjdChDYXRlZ29yeSwgVHlwZSwgUXR5X1NvbGQpICU+JQ0KICBzYW1wbGVfbigxMDAwMCwgcmVwbGFjZSA9IFRSVUUpDQpib290X3R5cGUkUmV2ZW51ZSA8LSBwcmVkaWN0KGxtX2ZpdCwgYm9vdF90eXBlKQ0KDQoNCmBgYA0KDQpgYGB7cn0NCg0KaW5wdXRfZGF0YV9mcmFtZSA8LSByZWFkLnRhYmxlKGhlYWRlciA9IFRSVUUsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSwgdGV4dCA9ICcNCkNhdGVnb3J5IFR5cGUgaW5wdXQNCkFjZXNzb3JpZXMJQ2xvdGhlcwk2MA0KQWNlc3NvcmllcwlIZWxtZXRzCTcwDQpBY2Vzc29yaWVzCVBhcnRzIDIwMA0KQmlrZXMJSHlicmlkIDIwDQpCaWtlcwlLaWRzIDExDQpCaWtlcwlNb3VudGFpbiAxMAkNCkJpa2VzCVJvYWQgMTENCicpDQoNCmdldF9yZXZlbnVlX3ByZWRpY3Rpb24gPC0gZnVuY3Rpb24oZGF0LCBpbnB1dF9kYXRhX2ZyYW1lKSB7DQogICMgZGF0IDwtIGJvb3RfdHlwZQ0KICANCiAgcGVyY2VudGlsZXMgPC0gZGF0ICU+JQ0KICAgIGRwbHlyOjpncm91cF9ieShDYXRlZ29yeSwgVHlwZSkgJT4lDQogICAgZHBseXI6Om11dGF0ZShwZXJjZW50aWxlID0gbnRpbGUoUXR5X1NvbGQsIDEwKSkgJT4lDQogICAgZHBseXI6Omdyb3VwX2J5KENhdGVnb3J5LCBUeXBlLCBwZXJjZW50aWxlKSAlPiUNCiAgICBkcGx5cjo6bXV0YXRlKG1pbiA9IG1pbihRdHlfU29sZCksIG1heCA9IG1heChRdHlfU29sZCkpICU+JQ0KICAgIGRwbHlyOjpsZWZ0X2pvaW4oaW5wdXRfZGF0YV9mcmFtZSwgYygiQ2F0ZWdvcnkiLCAiVHlwZSIpKSAlPiUNCiAgICBkcGx5cjo6ZmlsdGVyKG1heCA+PSBpbnB1dCAmIG1pbiA8IGlucHV0KSAlPiUNCiAgICBkcGx5cjo6dW5ncm91cCgpDQoNCiAgcGVyY2VudGlsZXMNCn0NCmBgYA0KDQpgYGB7cn0NCnByZWQgPC0gZ2V0X3JldmVudWVfcHJlZGljdGlvbihib290X3R5cGUsIGlucHV0X2RhdGFfZnJhbWUpICU+JQ0KICBkcGx5cjo6c2VsZWN0KENhdGVnb3J5LCBUeXBlLCBSZXZlbnVlKSAlPiUNCiAgZHBseXI6Om11dGF0ZShEYXRhID0gIlNlbGVjdGVkIikNCg0KcGxvdF9kYXRhIDwtIGJvb3RfdHlwZSAlPiUNCiAgZHBseXI6OnNlbGVjdChDYXRlZ29yeSwgVHlwZSwgUmV2ZW51ZSkgJT4lDQogIGRwbHlyOjptdXRhdGUoRGF0YSA9ICJBbGwiKQ0KDQpwbG90X2RhdGEgPC0gcmJpbmQocHJlZCwgcGxvdF9kYXRhKQ0KDQpwIDwtIGdncGxvdChwbG90X2RhdGEsIGFlcyhEYXRhLCBSZXZlbnVlKSkgKyBnZW9tX2JveHBsb3QoKQ0KcHJpbnQocCkNCmBgYA==